Close

@InProceedings{SantosSiDaRoDrDu:2021:FoUnAp,
               author = "Santos, Gabriel Lavoura dos and Silva, Vanessa Telles da and 
                         Dalmolin, Laura de Aguiar and Rodrigues, Ricardo Nagel and Drews 
                         Jr, Paulo Lilles Jorge and Duarte Filho, Nelson Lopes",
          affiliation = "Universidade Federal do Rio Grande, Brazil  and Universidade 
                         Federal do Rio Grande, Brazil  and Universidade Federal do Rio 
                         Grande, Brazil  and Universidade Federal do Rio Grande, Brazil  
                         and Universidade Federal do Rio Grande, Brazil  and Universidade 
                         Federal do Rio Grande, Brazil",
                title = "A Form Understanding Approach to Printed and Structured 
                         Engineering Documentation",
            booktitle = "Proceedings...",
                 year = "2021",
               editor = "Paiva, Afonso and Menotti, David and Baranoski, Gladimir V. G. and 
                         Proen{\c{c}}a, Hugo Pedro and Junior, Antonio Lopes Apolinario 
                         and Papa, Jo{\~a}o Paulo and Pagliosa, Paulo and dos Santos, 
                         Thiago Oliveira and e S{\'a}, Asla Medeiros and da Silveira, 
                         Thiago Lopes Trugillo and Brazil, Emilio Vital and Ponti, Moacir 
                         A. and Fernandes, Leandro A. F. and Avila, Sandra",
         organization = "Conference on Graphics, Patterns and Images, 34. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "form understanding, text detection, spatial layout analysis.",
             abstract = "A significant amount of companies still depends on printed 
                         documents, such as healthcare reports, engineering specifications, 
                         or historical documents. Those documents are diverse in terms of 
                         layout and content, thereby it requires different approaches for 
                         each document structure, which makes information extraction a 
                         costly and inefficient task. We classify documents into three 
                         categories, non-structured, semi-structured, and structured 
                         documents. The last one being the focus of the present work.We 
                         propose a pattern recognition method for structured documents with 
                         an anchoring relationship between question-answer objects through 
                         a system of hypotheses and a probability distribution in order to 
                         identify which predefined model the document belongs to. 
                         Therefore, acting as a system for both identification and content 
                         extraction to structured documents. The method has promising 
                         results for pattern recognition from all document models, with 78% 
                         to 97% objects extracted correctly.",
  conference-location = "Gramado, RS, Brazil (virtual)",
      conference-year = "18-22 Oct. 2021",
                  doi = "10.1109/SIBGRAPI54419.2021.00052",
                  url = "http://dx.doi.org/10.1109/SIBGRAPI54419.2021.00052",
             language = "en",
                  ibi = "8JMKD3MGPEW34M/45CKPKE",
                  url = "http://urlib.net/ibi/8JMKD3MGPEW34M/45CKPKE",
           targetfile = "Sibgrapi_2021 - Paper ID 64.pdf",
        urlaccessdate = "2024, May 06"
}


Close